Read in data:
# Data:
CC_dat <- read.csv("/Users/kellyloria/Documents/UNR/Randomness\ readings/CausalityClass/CM_data.csv")
Optional: Create arbitrary variable for temporal auto-correlation in plant size?
# Create lagged variable
CC_dat <- CC_dat %>%
mutate(lag_plant_size = lag(plant_size, default = NA))
Check out parameter covariance:
# covariance correlations
chart.Correlation(CC_dat, histogram=TRUE, pch=19)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
# saturated model:
hist(CC_dat$herbivory)
glb_mod <- lm(herbivory~scale(fertilizer) + scale(light) + scale(plant_size) +
scale(lag_plant_size) + scale(foraging),
data=CC_dat)
summary(glb_mod)
##
## Call:
## lm(formula = herbivory ~ scale(fertilizer) + scale(light) + scale(plant_size) +
## scale(lag_plant_size) + scale(foraging), data = CC_dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4817 -0.6490 0.0069 0.6568 3.4223
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01646 0.03209 -0.513 0.6081
## scale(fertilizer) 0.03942 0.04029 0.979 0.3280
## scale(light) 0.26929 0.04574 5.887 5.38e-09 ***
## scale(plant_size) 0.64003 0.04273 14.980 < 2e-16 ***
## scale(lag_plant_size) -0.06740 0.03214 -2.097 0.0362 *
## scale(foraging) 1.00974 0.03892 25.944 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.014 on 993 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7105, Adjusted R-squared: 0.709
## F-statistic: 487.3 on 5 and 993 DF, p-value: < 2.2e-16
hist(residuals(glb_mod))
vif(glb_mod) # light looks like highest leverage
## scale(fertilizer) scale(light) scale(plant_size)
## 1.575339 2.031419 1.771903
## scale(lag_plant_size) scale(foraging)
## 1.001940 1.470318
glb_mod1 <- lm(herbivory~scale(fertilizer) + scale(light) + scale(plant_size) + scale(foraging),
data=CC_dat)
summary(glb_mod1)
##
## Call:
## lm(formula = herbivory ~ scale(fertilizer) + scale(light) + scale(plant_size) +
## scale(foraging), data = CC_dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5269 -0.6654 -0.0100 0.6762 3.5751
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01714 0.03212 -0.534 0.594
## scale(fertilizer) 0.03956 0.04031 0.981 0.327
## scale(light) 0.27078 0.04580 5.912 4.65e-09 ***
## scale(plant_size) 0.63951 0.04277 14.953 < 2e-16 ***
## scale(foraging) 1.00683 0.03894 25.854 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.016 on 995 degrees of freedom
## Multiple R-squared: 0.7093, Adjusted R-squared: 0.7081
## F-statistic: 606.9 on 4 and 995 DF, p-value: < 2.2e-16
hist(residuals(glb_mod1))
vif(glb_mod1) # light looks like highest leverage
## scale(fertilizer) scale(light) scale(plant_size) scale(foraging)
## 1.573452 2.031273 1.770796 1.468256
glb_modi <- lm(herbivory~scale(fertilizer) + scale(light) + scale(plant_size) + scale(foraging) + scale(fertilizer * light),
data=CC_dat)
summary(glb_modi)
##
## Call:
## lm(formula = herbivory ~ scale(fertilizer) + scale(light) + scale(plant_size) +
## scale(foraging) + scale(fertilizer * light), data = CC_dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5090 -0.6658 -0.0022 0.6780 3.5047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01714 0.03212 -0.534 0.594
## scale(fertilizer) 0.03986 0.04031 0.989 0.323
## scale(light) 0.26876 0.04583 5.865 6.11e-09 ***
## scale(plant_size) 0.64079 0.04277 14.982 < 2e-16 ***
## scale(foraging) 1.00722 0.03894 25.869 < 2e-16 ***
## scale(fertilizer * light) 0.03846 0.03216 1.196 0.232
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.016 on 994 degrees of freedom
## Multiple R-squared: 0.7097, Adjusted R-squared: 0.7082
## F-statistic: 486 on 5 and 994 DF, p-value: < 2.2e-16
hist(residuals(glb_modi))
vif(glb_modi)
## scale(fertilizer) scale(light) scale(plant_size)
## 1.573513 2.034016 1.771900
## scale(foraging) scale(fertilizer * light)
## 1.468358 1.001581
AIC(glb_mod, glb_mod1, glb_modi) # ? better with fake lag...
## Warning in AIC.default(glb_mod, glb_mod1, glb_modi): models are not all fitted
## to the same number of observations
## df AIC
## glb_mod 7 2871.477
## glb_mod1 6 2876.204
## glb_modi 7 2876.766
# Break down component regressions
plant_size_mod <- lm(plant_size ~ fertilizer + light,
data = CC_dat)
summary(plant_size_mod)
##
## Call:
## lm(formula = plant_size ~ fertilizer + light, data = CC_dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2049 -0.6430 -0.0068 0.6828 3.3435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0007139 0.0307122 0.023 0.981
## fertilizer 0.4967008 0.0363250 13.674 <2e-16 ***
## light 0.5051542 0.0356793 14.158 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9704 on 997 degrees of freedom
## Multiple R-squared: 0.4349, Adjusted R-squared: 0.4338
## F-statistic: 383.7 on 2 and 997 DF, p-value: < 2.2e-16
herb_mod <- lm(herbivory ~ plant_size + light + foraging,
data = CC_dat)
summary(herb_mod)
##
## Call:
## lm(formula = herbivory ~ plant_size + light + foraging, data = CC_dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5185 -0.6653 0.0023 0.6761 3.6133
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.004691 0.032127 0.146 0.884
## plant_size 0.508836 0.030435 16.719 < 2e-16 ***
## light 0.283461 0.045091 6.286 4.85e-10 ***
## foraging 0.828113 0.032026 25.857 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.016 on 996 degrees of freedom
## Multiple R-squared: 0.709, Adjusted R-squared: 0.7081
## F-statistic: 808.9 on 3 and 996 DF, p-value: < 2.2e-16
# Use the `psem` function to create the SEM
CC_psem_model <- psem(plant_size_mod, herb_mod)
## Warning: NAs detected in the dataset. Consider removing all rows with NAs to
## prevent fitting to different subsets of data
# Look at object
CC_psem_model
## Structural Equations of x :
## lm: plant_size ~ fertilizer + light
## lm: herbivory ~ plant_size + light + foraging
##
## Data:
## fertilizer light plant_size foraging herbivory lag_plant_size
## 1 0.5208573 -0.3872029 -0.7957658 -0.6570881 -1.6942758 NA
## 2 0.8155222 -0.1671887 -1.4537077 0.4605768 -0.7142346 -0.7957658
## 3 0.8835973 -1.4635119 -0.5343125 -2.0035209 -2.9674024 -1.4537077
## 4 -0.2264909 -1.4187651 0.1868491 0.4587782 -0.1949140 -0.5343125
## 5 -1.4803645 -0.1481271 -0.9388211 -0.1277455 -0.5076546 0.1868491
## 6 0.5107644 1.5497233 -0.3658381 0.9979259 1.2264027 -0.9388211
## ...with 994 more rows
##
## [1] "class(psem)"
# Use `summary` function to get all information at once
summary(CC_psem_model)
##
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
##
## Structural Equation Model of CC_psem_model
##
## Call:
## plant_size ~ fertilizer + light
## herbivory ~ plant_size + light + foraging
##
## AIC
## 5657.861
##
## ---
## Tests of directed separation:
##
## Independ.Claim Test.Type DF Crit.Value P.Value
## herbivory ~ fertilizer + ... coef 995 0.9813 0.3267
## plant_size ~ foraging + ... coef 996 0.7844 0.4330
##
## --
## Global goodness-of-fit:
##
## Chi-Squared = 1.585 with P-value = 0.453 and on 2 degrees of freedom
## Fisher's C = 3.911 with P-value = 0.418 and on 4 degrees of freedom
##
## ---
## Coefficients:
##
## Response Predictor Estimate Std.Error DF Crit.Value P.Value Std.Estimate
## plant_size fertilizer 0.4967 0.0363 997 13.6738 0 0.3747
## plant_size light 0.5052 0.0357 997 14.1582 0 0.3880
## herbivory plant_size 0.5088 0.0304 996 16.7190 0 0.3490
## herbivory light 0.2835 0.0451 996 6.2864 0 0.1493
## herbivory foraging 0.8281 0.0320 996 25.8573 0 0.5355
##
## ***
## ***
## ***
## ***
## ***
##
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05
##
## ---
## Individual R-squared:
##
## Response method R.squared
## plant_size none 0.43
## herbivory none 0.71
# Plot SEM with standardized coefficients
plot(CC_psem_model)
# Get P-values
P1 <- summary(plant_size_mod)$coefficients[2, "Pr(>|t|)"]
P2 <- summary(herb_mod)$coefficients[2, "Pr(>|t|)"]
# Construct C-statistic
C <- -2 * (log(P1) + log(P2))
C
## [1] 429.1085
fisherC(CC_psem_model)
## Fisher.C df P.Value
## 1 3.911 4 0.418
# Compare to chi-squared distribution with 2*2 degrees of freedom
1 - pchisq(C, 4) # P < 0.05 == poor fit!
## [1] 0
# Can use `dsep` function to perform the tests automatically
dSep(CC_psem_model)
##
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
## Independ.Claim Test.Type DF Crit.Value P.Value
## 1 herbivory ~ fertilizer + ... coef 995 0.9812831 0.3266917
## 2 plant_size ~ foraging + ... coef 996 0.7843585 0.4330163
plant_size_modi <- lm(plant_size ~ fertilizer + light + fertilizer*light,
data = CC_dat)
summary(plant_size_mod)
##
## Call:
## lm(formula = plant_size ~ fertilizer + light, data = CC_dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2049 -0.6430 -0.0068 0.6828 3.3435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0007139 0.0307122 0.023 0.981
## fertilizer 0.4967008 0.0363250 13.674 <2e-16 ***
## light 0.5051542 0.0356793 14.158 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9704 on 997 degrees of freedom
## Multiple R-squared: 0.4349, Adjusted R-squared: 0.4338
## F-statistic: 383.7 on 2 and 997 DF, p-value: < 2.2e-16
herb_mod <- lm(herbivory ~ plant_size + light + foraging,
data = CC_dat)
summary(herb_mod)
##
## Call:
## lm(formula = herbivory ~ plant_size + light + foraging, data = CC_dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5185 -0.6653 0.0023 0.6761 3.6133
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.004691 0.032127 0.146 0.884
## plant_size 0.508836 0.030435 16.719 < 2e-16 ***
## light 0.283461 0.045091 6.286 4.85e-10 ***
## foraging 0.828113 0.032026 25.857 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.016 on 996 degrees of freedom
## Multiple R-squared: 0.709, Adjusted R-squared: 0.7081
## F-statistic: 808.9 on 3 and 996 DF, p-value: < 2.2e-16
# Use the `psem` function to create the SEM
CC_psem_model2 <- psem(plant_size_modi, herb_mod)
## Warning: NAs detected in the dataset. Consider removing all rows with NAs to
## prevent fitting to different subsets of data
# Look at object
CC_psem_model2 # Sometimes it works better to specifiy models in line, with called df
## Structural Equations of x :
## lm: plant_size ~ fertilizer + light + fertilizer * light
## lm: herbivory ~ plant_size + light + foraging
##
## Data:
## fertilizer light plant_size foraging herbivory lag_plant_size
## 1 0.5208573 -0.3872029 -0.7957658 -0.6570881 -1.6942758 NA
## 2 0.8155222 -0.1671887 -1.4537077 0.4605768 -0.7142346 -0.7957658
## 3 0.8835973 -1.4635119 -0.5343125 -2.0035209 -2.9674024 -1.4537077
## 4 -0.2264909 -1.4187651 0.1868491 0.4587782 -0.1949140 -0.5343125
## 5 -1.4803645 -0.1481271 -0.9388211 -0.1277455 -0.5076546 0.1868491
## 6 0.5107644 1.5497233 -0.3658381 0.9979259 1.2264027 -0.9388211
## ...with 994 more rows
##
## [1] "class(psem)"
# Plot SEM with standardized coefficients
plot(CC_psem_model2)
# Use `summary` function to get all information at once
summary(CC_psem_model2)
##
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
##
## Structural Equation Model of CC_psem_model2
##
## Call:
## plant_size ~ fertilizer + light + fertilizer * light
## herbivory ~ plant_size + light + foraging
##
## AIC
## 5659.228
##
## ---
## Tests of directed separation:
##
## Independ.Claim Test.Type DF Crit.Value P.Value
## herbivory ~ fertilizer + ... coef 995 0.9813 0.3267
## plant_size ~ foraging + ... coef 995 0.7771 0.4373
##
## --
## Global goodness-of-fit:
##
## Chi-Squared = 1.574 with P-value = 0.455 and on 2 degrees of freedom
## Fisher's C = 3.892 with P-value = 0.421 and on 4 degrees of freedom
##
## ---
## Coefficients:
##
## Response Predictor Estimate Std.Error DF Crit.Value P.Value
## plant_size fertilizer 0.4962 0.0363 996 13.6551 0.0000
## plant_size light 0.5060 0.0357 996 14.1728 0.0000
## plant_size fertilizer:light -0.0226 0.0285 996 -0.7945 0.4271
## herbivory plant_size 0.5088 0.0304 996 16.7190 0.0000
## herbivory light 0.2835 0.0451 996 6.2864 0.0000
## herbivory foraging 0.8281 0.0320 996 25.8573 0.0000
## Std.Estimate
## 0.3743 ***
## 0.3886 ***
## -0.0189
## 0.3490 ***
## 0.1493 ***
## 0.5355 ***
##
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05
##
## ---
## Individual R-squared:
##
## Response method R.squared
## plant_size none 0.44
## herbivory none 0.71
# Get P-values
P1 <- summary(plant_size_modi)$coefficients[2, "Pr(>|t|)"]
P2 <- summary(herb_mod)$coefficients[2, "Pr(>|t|)"]
# Construct C-statistic
C <- -2 * (log(P1) + log(P2))
C
## [1] 428.6605
fisherC(CC_psem_model2)
## Fisher.C df P.Value
## 1 3.892 4 0.421
AIC(CC_psem_model2, CC_psem_model)
## AIC K n
## 1 5659.228 10 1000
## 2 5657.861 9 1000
plant_size_b <- bf(plant_size ~ fertilizer + light)
herb_b <- bf(herbivory ~ plant_size + light + foraging)
CC_fit_brms <- brm(plant_size_b +
herb_b +
set_rescor(FALSE),
data = CC_dat,
iter = 5000, warmup = 2500,
cores = 3, chains = 3)
## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
## extra argument 'resp' will be disregarded
## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
## extra argument 'resp' will be disregarded
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 15.0.0 (clang-1500.1.0.2.5)’
## using SDK: ‘MacOSX14.2.sdk’
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/x86_64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## #include <cmath>
## ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
## extra argument 'resp' will be disregarded
## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
## extra argument 'resp' will be disregarded
Trace plots
plot(CC_fit_brms)
Extract posterior estimates for mean path coefficients
# Extract posterior summaries
posterior_summaries <- posterior_summary(CC_fit_brms)
# Extract estimates and CIs for paths
estimates <- posterior_summaries[, "Estimate"]
ci_lower <- posterior_summaries[, "Q2.5"]
ci_upper <- posterior_summaries[, "Q97.5"]
# Create path labels
fertilizer_to_plant_size <- paste0("mean = ", round(estimates["b_plantsize_fertilizer"], 2),
", CI = [", round(ci_lower["b_plantsize_fertilizer"], 2),
", ", round(ci_upper["b_plantsize_fertilizer"], 2), "]")
light_to_plant_size <- paste0("mean = ", round(estimates["b_plantsize_light"], 2),
", CI = [", round(ci_lower["b_plantsize_light"], 2),
", ", round(ci_upper["b_plantsize_light"], 2), "]")
plant_size_to_herbivory <- paste0("mean = ", round(estimates["b_herbivory_plant_size"], 2),
", CI = [", round(ci_lower["b_herbivory_plant_size"], 2),
", ", round(ci_upper["b_herbivory_plant_size"], 2), "]")
light_to_herbivory <- paste0("mean = ", round(estimates["b_herbivory_light"], 2),
", CI = [", round(ci_lower["b_herbivory_light"], 2),
", ", round(ci_upper["b_herbivory_light"], 2), "]")
foraging_to_herbivory <- paste0("mean = ", round(estimates["b_herbivory_foraging"], 2),
", CI = [", round(ci_lower["b_herbivory_foraging"], 2),
", ", round(ci_upper["b_herbivory_foraging"], 2), "]")
Plot using grViz
# Create the DAG plot
dag_plot <- grViz(paste0("
digraph DAG {
graph [layout = dot, rankdir = TB]
node [shape = box]
fertilizer -> plant_size [label = '", fertilizer_to_plant_size, "']
light -> plant_size [label = '", light_to_plant_size, "']
plant_size -> herbivory [label = '", plant_size_to_herbivory, "']
light -> herbivory [label = '", light_to_herbivory, "']
foraging -> herbivory [label = '", foraging_to_herbivory, "']
plant_size [label = 'Plant Size']
fertilizer [label = 'Fertilizer']
light [label = 'Light']
herbivory [label = 'Herbivory']
foraging [label = 'Foraging']
}
"))
dag_plot